In this report, we extract information about published JOSS papers and generate graphics as well as a summary table that can be downloaded and used for further analyses.
suppressPackageStartupMessages({
library(tibble)
library(rcrossref)
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(gh)
library(purrr)
library(jsonlite)
library(DT)
library(plotly)
library(citecorp)
library(readr)
})## Keep track of the source of each column
source_track <- c()
## Determine whether to add a caption with today's date to the (non-interactive) plots
add_date_caption <- TRUE
if (add_date_caption) {
dcap <- lubridate::today()
} else {
dcap <- ""
}## Read archived version of summary data frame, to use for filling in
## information about software repositories (due to limit on API requests)
## Sort by the date when software repo info was last obtained
papers_archive <- readRDS(gzcon(url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_analytics.rds?raw=true"))) %>%
dplyr::arrange(!is.na(repo_info_obtained), repo_info_obtained)
## Similarly for citation analysis, to avoid having to pull down the
## same information multiple times
citations_archive <- readr::read_delim(
url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_citations.tsv?raw=true"),
col_types = cols(.default = "c"), col_names = TRUE,
delim = "\t")We get the information about published JOSS papers from Crossref,
using the rcrossref R package. This package is also used to
extract citation counts.
## Fetch JOSS papers from Crossref
## Only 1000 papers at the time can be pulled down
lim <- 1000
papers <- rcrossref::cr_works(filter = c(issn = "2475-9066"),
limit = lim)$data
i <- 1
while (nrow(papers) == i * lim) {
papers <- dplyr::bind_rows(
papers,
rcrossref::cr_works(filter = c(issn = "2475-9066"),
limit = lim, offset = i * lim)$data)
i <- i + 1
}
papers <- papers %>%
dplyr::filter(type == "journal-article")
## A few papers don't have DOIs - generate them from the URL
noaltid <- which(is.na(papers$alternative.id))
papers$alternative.id[noaltid] <- gsub("http://dx.doi.org/", "",
papers$url[noaltid])
## Get citation info from Crossref and merge with paper details
cit <- rcrossref::cr_citation_count(doi = papers$alternative.id)
papers <- papers %>% dplyr::left_join(
cit %>% dplyr::rename(citation_count = count),
by = c("alternative.id" = "doi")
)
## Remove one duplicated paper
papers <- papers %>% dplyr::filter(alternative.id != "10.21105/joss.00688")
source_track <- c(source_track,
structure(rep("crossref", ncol(papers)),
names = colnames(papers)))For each published paper, we use the Whedon API to get information about pre-review and review issue numbers, corresponding software repository etc.
whedon <- list()
p <- 1
a0 <- NULL
a <- jsonlite::fromJSON(
url(paste0("https://joss.theoj.org/papers/published.json?page=", p)),
simplifyDataFrame = FALSE
)
while (length(a) > 0 && !identical(a, a0)) {
whedon <- c(whedon, a)
p <- p + 1
a0 <- a
a <- tryCatch({
jsonlite::fromJSON(
url(paste0("https://joss.theoj.org/papers/published.json?page=", p)),
simplifyDataFrame = FALSE
)},
error = function(e) return(numeric(0))
)
}
whedon <- do.call(dplyr::bind_rows, lapply(whedon, function(w) {
data.frame(api_title = w$title,
api_state = w$state,
editor = paste(w$editor, collapse = ","),
reviewers = paste(w$reviewers, collapse = ","),
nbr_reviewers = length(w$reviewers),
repo_url = w$software_repository,
review_issue_id = sub("https://github.com/openjournals/joss-reviews/issues/", "", w$paper_review),
doi = w$doi,
prereview_issue_id = ifelse(!is.null(w$meta_review_issue_id),
w$meta_review_issue_id, NA_integer_),
languages = paste(w$languages, collapse = ","),
archive_doi = w$software_archive)
}))
papers <- papers %>% dplyr::left_join(whedon, by = c("alternative.id" = "doi"))
source_track <- c(source_track,
structure(rep("whedon", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))From each pre-review and review issue, we extract information about review times and assigned labels.
## Pull down info on all issues in the joss-reviews repository
issues <- gh("/repos/openjournals/joss-reviews/issues",
.limit = 5000, state = "all")## From each issue, extract required information
iss <- do.call(dplyr::bind_rows, lapply(issues, function(i) {
data.frame(title = i$title,
number = i$number,
state = i$state,
opened = i$created_at,
closed = ifelse(!is.null(i$closed_at),
i$closed_at, NA_character_),
ncomments = i$comments,
labels = paste(setdiff(
vapply(i$labels, getElement,
name = "name", character(1L)),
c("review", "pre-review", "query-scope", "paused")),
collapse = ","))
}))
## Split into REVIEW, PRE-REVIEW, and other issues (the latter category
## is discarded)
issother <- iss %>% dplyr::filter(!grepl("\\[PRE REVIEW\\]", title) &
!grepl("\\[REVIEW\\]", title))
dim(issother)## [1] 134 7
head(issother)## title
## 1 org
## 2 @csoneson - I
## 3 Full author name year, rather than last name year, appearing for references to corresponding author
## 4 Example usage: Do the authors include examples of how to use the software (ideally to solve real-world analysis problems).
## 5 JOSS REVIEW- Performance -> Reading saved sites
## 6 ## Review checklist for @mstimberg
## number state opened closed ncomments labels
## 1 5709 closed 2023-07-31T07:23:26Z 2023-07-31T07:23:28Z 1
## 2 5708 closed 2023-07-29T18:30:07Z 2023-07-29T18:30:09Z 1
## 3 5317 closed 2023-03-31T05:29:46Z 2023-03-31T05:29:48Z 1
## 4 5308 closed 2023-03-28T16:27:18Z 2023-03-28T16:27:20Z 1
## 5 5163 closed 2023-02-17T11:48:48Z 2023-02-17T11:48:51Z 1
## 6 5002 closed 2022-12-09T16:15:19Z 2022-12-09T16:15:21Z 1
## For REVIEW issues, generate the DOI of the paper from the issue number
getnbrzeros <- function(s) {
paste(rep(0, 5 - nchar(s)), collapse = "")
}
issrev <- iss %>% dplyr::filter(grepl("\\[REVIEW\\]", title)) %>%
dplyr::mutate(nbrzeros = purrr::map_chr(number, getnbrzeros)) %>%
dplyr::mutate(alternative.id = paste0("10.21105/joss.",
nbrzeros,
number)) %>%
dplyr::select(-nbrzeros) %>%
dplyr::mutate(title = gsub("\\[REVIEW\\]: ", "", title)) %>%
dplyr::rename_at(vars(-alternative.id), ~ paste0("review_", .))## For pre-review and review issues, respectively, get the number of
## issues closed each month, and the number of those that have the
## 'rejected' label
review_rejected <- iss %>%
dplyr::filter(grepl("\\[REVIEW\\]", title)) %>%
dplyr::filter(!is.na(closed)) %>%
dplyr::mutate(closedmonth = lubridate::floor_date(as.Date(closed), "month")) %>%
dplyr::group_by(closedmonth) %>%
dplyr::summarize(nbr_issues_closed = length(labels),
nbr_rejections = sum(grepl("rejected", labels))) %>%
dplyr::mutate(itype = "review")
prereview_rejected <- iss %>%
dplyr::filter(grepl("\\[PRE REVIEW\\]", title)) %>%
dplyr::filter(!is.na(closed)) %>%
dplyr::mutate(closedmonth = lubridate::floor_date(as.Date(closed), "month")) %>%
dplyr::group_by(closedmonth) %>%
dplyr::summarize(nbr_issues_closed = length(labels),
nbr_rejections = sum(grepl("rejected", labels))) %>%
dplyr::mutate(itype = "pre-review")
all_rejected <- dplyr::bind_rows(review_rejected, prereview_rejected)## For PRE-REVIEW issues, add information about the corresponding REVIEW
## issue number
isspre <- iss %>% dplyr::filter(grepl("\\[PRE REVIEW\\]", title)) %>%
dplyr::filter(!grepl("withdrawn", labels)) %>%
dplyr::filter(!grepl("rejected", labels))
## Some titles have multiple pre-review issues. In these cases, keep the latest
isspre <- isspre %>% dplyr::arrange(desc(number)) %>%
dplyr::filter(!duplicated(title)) %>%
dplyr::mutate(title = gsub("\\[PRE REVIEW\\]: ", "", title)) %>%
dplyr::rename_all(~ paste0("prerev_", .))
papers <- papers %>% dplyr::left_join(issrev, by = "alternative.id") %>%
dplyr::left_join(isspre, by = c("prereview_issue_id" = "prerev_number")) %>%
dplyr::mutate(prerev_opened = as.Date(prerev_opened),
prerev_closed = as.Date(prerev_closed),
review_opened = as.Date(review_opened),
review_closed = as.Date(review_closed)) %>%
dplyr::mutate(days_in_pre = prerev_closed - prerev_opened,
days_in_rev = review_closed - review_opened,
to_review = !is.na(review_opened))
source_track <- c(source_track,
structure(rep("joss-github", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))## Reorder so that software repositories that were interrogated longest
## ago are checked first
tmporder <- order(match(papers$alternative.id, papers_archive$alternative.id),
na.last = FALSE)
software_urls <- papers$repo_url[tmporder]
is_github <- grepl("github", software_urls)
length(is_github)## [1] 2133
sum(is_github)## [1] 2009
software_urls[!is_github]## [1] "https://gitlab.com/moerman1/fhi-cc4s"
## [2] "https://gitlab.pasteur.fr/vlegrand/ROCK"
## [3] "https://gitlab.inria.fr/bramas/tbfmm"
## [4] "https://gitlab.com/mmartin-lagarde/exonoodle-exoplanets/-/tree/master/"
## [5] "https://gitlab.com/utopia-project/utopia"
## [6] "https://jugit.fz-juelich.de/compflu/swalbe.jl/"
## [7] "https://gitlab.com/wpettersson/kep_solver"
## [8] "https://gitlab.com/ENKI-portal/ThermoCodegen"
## [9] "https://gitlab.com/fduchate/predihood"
## [10] "https://gitlab.kuleuven.be/ITSCreaLab/public-toolboxes/dyntapy"
## [11] "https://bitbucket.org/meg/cbcbeat"
## [12] "https://gitlab.com/dmt-development/dmt-core"
## [13] "https://gitlab.com/pyFBS/pyFBS"
## [14] "https://gitlab.dune-project.org/dorie/dorie"
## [15] "https://bitbucket.org/orionmhdteam/orion2_release1/src/master/"
## [16] "https://gitlab.mpikg.mpg.de/curcuraci/bmiptools"
## [17] "https://git.ligo.org/asimov/asimov"
## [18] "https://gitlab.com/myqueue/myqueue"
## [19] "https://gitlab.com/jason-rumengan/pyarma"
## [20] "https://gitlab.com/fibreglass/pivc"
## [21] "https://gite.lirmm.fr/doccy/RedOak"
## [22] "https://savannah.nongnu.org/projects/complot/"
## [23] "https://gitlab.inria.fr/miet/miet"
## [24] "https://gitlab.com/cosmograil/starred"
## [25] "https://bitbucket.org/hammurabicode/hamx"
## [26] "https://bitbucket.org/berkeleylab/hardware-control/src/main/"
## [27] "http://mutabit.com/repos.fossil/grafoscopio/"
## [28] "https://gitlab.com/gdetor/genetic_alg"
## [29] "https://gitlab.com/cerfacs/batman"
## [30] "https://gricad-gitlab.univ-grenoble-alpes.fr/ttk/spam/"
## [31] "https://gitlab.com/manchester_qbi/manchester_qbi_public/madym_cxx/"
## [32] "https://gitlab.inria.fr/bcoye/game-engine-scheduling-simulation"
## [33] "https://gitlab.com/utopia-project/dantro"
## [34] "https://gitlab.com/culturalcartography/text2map"
## [35] "https://gitlab.com/petsc/petsc"
## [36] "https://gitlab.com/ffaucher/hawen"
## [37] "https://bitbucket.org/cardosan/brightway2-temporalis"
## [38] "https://gitlab.com/libreumg/dataquier.git"
## [39] "https://bitbucket.org/manuela_s/hcp/"
## [40] "https://gitlab.com/emd-dev/emd"
## [41] "https://gitlab.com/ProjectRHEA/flowsolverrhea"
## [42] "https://framagit.org/GustaveCoste/off-product-environmental-impact/"
## [43] "https://bitbucket.org/rram/dvrlib/src/joss/"
## [44] "https://gitlab.ethz.ch/holukas/dyco-dynamic-lag-compensation"
## [45] "https://gitlab.com/picos-api/picos"
## [46] "https://gitlab.com/project-dare/dare-platform"
## [47] "https://bitbucket.org/clhaley/Multitaper.jl"
## [48] "https://gitlab.uliege.be/smart_grids/public/gboml"
## [49] "https://gitlab.com/dlr-dw/ontocode"
## [50] "https://gitlab.com/vibes-developers/vibes"
## [51] "https://gitlab.com/marinvaders/marinvaders"
## [52] "https://gitlab.com/sails-dev/sails"
## [53] "https://bitbucket.org/bmskinner/nuclear_morphology"
## [54] "https://gitlab.com/InspectorCell/inspectorcell"
## [55] "https://gitlab.com/jesseds/apav"
## [56] "https://gitlab.com/dlr-ve/esy/amiris/amiris"
## [57] "https://gitlab.inria.fr/melissa/melissa"
## [58] "https://gitlab.com/binary_c/binary_c-python/"
## [59] "https://git.rwth-aachen.de/ants/sensorlab/imea"
## [60] "https://plmlab.math.cnrs.fr/lmrs/statistique/smmR"
## [61] "https://gitlab.com/sissopp_developers/sissopp"
## [62] "https://earth.bsc.es/gitlab/wuruchi/autosubmitreact"
## [63] "https://gitlab.gwdg.de/mpievolbio-it/crbhits"
## [64] "https://gitlab.com/remram44/taguette"
## [65] "https://bitbucket.org/mpi4py/mpi4py-fft"
## [66] "https://gitlab.com/cracklet/cracklet.git"
## [67] "https://bitbucket.org/sciencecapsule/sciencecapsule"
## [68] "https://gitlab.kitware.com/LBM/lattice-boltzmann-solver"
## [69] "https://gitlab.com/QComms/cqptoolkit"
## [70] "https://gitlab.com/eidheim/Simple-Web-Server"
## [71] "https://gitlab.com/fame-framework/fame-io"
## [72] "https://gitlab.com/toposens/public/ros-packages"
## [73] "https://gitlab.inria.fr/azais/treex"
## [74] "https://gitlab.com/pvst/asi"
## [75] "https://gitlab.com/thartwig/asloth"
## [76] "https://bitbucket.org/cdegroot/wediff"
## [77] "https://bitbucket.org/basicsums/basicsums"
## [78] "https://framagit.org/GustaveCoste/eldam"
## [79] "https://gitlab.com/dsbowen/conditional-inference"
## [80] "https://code.usgs.gov/umesc/quant-ecology/fishstan/"
## [81] "https://git.geomar.de/digital-earth/dasf/dasf-messaging-python"
## [82] "https://gitlab.ifremer.fr/resourcecode/resourcecode"
## [83] "https://bitbucket.org/glotzer/rowan"
## [84] "https://gitlab.com/fame-framework/fame-core"
## [85] "https://gitlab.com/sigcorr/sigcorr"
## [86] "https://www.idpoisson.fr/fullswof/"
## [87] "https://gitlab.com/moorepants/skijumpdesign"
## [88] "https://git.iws.uni-stuttgart.de/tools/frackit"
## [89] "https://gitlab.com/cosmograil/PyCS3"
## [90] "https://bitbucket.org/miketuri/perl-spice-sim-seus/"
## [91] "https://bitbucket.org/ocellarisproject/ocellaris"
## [92] "https://gitlab.inria.fr/mosaic/bvpy"
## [93] "https://bitbucket.org/berkeleylab/esdr-pygdh/"
## [94] "https://gitlab.com/davidtourigny/dynamic-fba"
## [95] "https://gitlab.com/dlr-ve/autumn/"
## [96] "https://sourceforge.net/p/mcapl/mcapl_code/ci/master/tree/"
## [97] "https://gitlab.com/ags-data-format-wg/ags-python-library"
## [98] "https://bitbucket.org/dolfin-adjoint/pyadjoint"
## [99] "https://bitbucket.org/likask/mofem-cephas"
## [100] "https://gitlab.com/drti/basic-tools"
## [101] "https://gitlab.com/chaver/choco-mining"
## [102] "https://gitlab.com/LMSAL_HUB/aia_hub/aiapy"
## [103] "https://bitbucket.org/cmutel/brightway2"
## [104] "https://gitlab.com/materials-modeling/wulffpack"
## [105] "https://gitlab.ruhr-uni-bochum.de/reichp2y/proppy"
## [106] "https://git.mpib-berlin.mpg.de/castellum/castellum"
## [107] "https://gitlab.com/permafrostnet/teaspoon"
## [108] "https://gitlab.com/dlr-ve/esy/sfctools/framework/"
## [109] "https://gitlab.com/tesch1/cppduals"
## [110] "https://gitlab.com/celliern/scikit-fdiff/"
## [111] "https://gitlab.com/ampere2/metalwalls"
## [112] "https://gitlab.com/geekysquirrel/bigx"
## [113] "https://bitbucket.org/cloopsy/android/"
## [114] "https://bitbucket.org/dghoshal/frieda"
## [115] "https://gitlab.com/programgreg/tagginglatencyestimator"
## [116] "https://doi.org/10.17605/OSF.IO/3DS6A"
## [117] "https://gitlab.com/energyincities/besos/"
## [118] "https://gitlab.com/dglaeser/fieldcompare"
## [119] "https://gitlab.com/gims-developers/gims"
## [120] "https://bitbucket.org/mituq/muq2.git"
## [121] "https://gitlab.awi.de/sicopolis/sicopolis"
## [122] "https://gitlab.com/costrouc/pysrim"
## [123] "https://gitlab.com/datafold-dev/datafold/"
## [124] "https://c4science.ch/source/tamaas/"
df <- do.call(dplyr::bind_rows, lapply(software_urls[is_github], function(u) {
u0 <- gsub("^http://", "https://", gsub("\\.git$", "", gsub("/$", "", u)))
if (grepl("/tree/", u0)) {
u0 <- strsplit(u0, "/tree/")[[1]][1]
}
if (grepl("/blob/", u0)) {
u0 <- strsplit(u0, "/blob/")[[1]][1]
}
info <- try({
gh(gsub("(https://)?(www.)?github.com/", "/repos/", u0))
})
languages <- try({
gh(paste0(gsub("(https://)?(www.)?github.com/", "/repos/", u0), "/languages"),
.limit = 500)
})
topics <- try({
gh(paste0(gsub("(https://)?(www.)?github.com/", "/repos/", u0), "/topics"),
.accept = "application/vnd.github.mercy-preview+json", .limit = 500)
})
contribs <- try({
gh(paste0(gsub("(https://)?(www.)?github.com/", "/repos/", u0), "/contributors"),
.limit = 500)
})
if (!is(info, "try-error") && length(info) > 1) {
if (!is(contribs, "try-error")) {
if (length(contribs) == 0) {
repo_nbr_contribs <- repo_nbr_contribs_2ormore <- NA_integer_
} else {
repo_nbr_contribs <- length(contribs)
repo_nbr_contribs_2ormore <- sum(vapply(contribs, function(x) x$contributions >= 2, NA_integer_))
if (is.na(repo_nbr_contribs_2ormore)) {
print(contribs)
}
}
} else {
repo_nbr_contribs <- repo_nbr_contribs_2ormore <- NA_integer_
}
if (!is(languages, "try-error")) {
if (length(languages) == 0) {
repolang <- ""
} else {
repolang <- paste(paste(names(unlist(languages)),
unlist(languages), sep = ":"), collapse = ",")
}
} else {
repolang <- ""
}
if (!is(topics, "try-error")) {
if (length(topics$names) == 0) {
repotopics <- ""
} else {
repotopics <- paste(unlist(topics$names), collapse = ",")
}
} else {
repotopics <- ""
}
data.frame(repo_url = u,
repo_created = info$created_at,
repo_updated = info$updated_at,
repo_pushed = info$pushed_at,
repo_nbr_stars = info$stargazers_count,
repo_language = ifelse(!is.null(info$language),
info$language, NA_character_),
repo_languages_bytes = repolang,
repo_topics = repotopics,
repo_license = ifelse(!is.null(info$license),
info$license$key, NA_character_),
repo_nbr_contribs = repo_nbr_contribs,
repo_nbr_contribs_2ormore = repo_nbr_contribs_2ormore
)
} else {
NULL
}
})) %>%
dplyr::mutate(repo_created = as.Date(repo_created),
repo_updated = as.Date(repo_updated),
repo_pushed = as.Date(repo_pushed)) %>%
dplyr::distinct() %>%
dplyr::mutate(repo_info_obtained = lubridate::today())
stopifnot(length(unique(df$repo_url)) == length(df$repo_url))
dim(df)
## For papers not in df (i.e., for which we didn't get a valid response
## from the GitHub API query), use information from the archived data frame
dfarchive <- papers_archive %>%
dplyr::select(colnames(df)[colnames(df) %in% colnames(papers_archive)]) %>%
dplyr::filter(!(repo_url %in% df$repo_url))
df <- dplyr::bind_rows(df, dfarchive)
papers <- papers %>% dplyr::left_join(df, by = "repo_url")
source_track <- c(source_track,
structure(rep("sw-github", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))## Convert publication date to Date format
## Add information about the half year (H1, H2) of publication
## Count number of authors
papers <- papers %>% dplyr::select(-reference, -license, -link) %>%
dplyr::mutate(published.date = as.Date(published.print)) %>%
dplyr::mutate(
halfyear = paste0(year(published.date),
ifelse(month(published.date) <= 6, "H1", "H2"))
) %>% dplyr::mutate(
halfyear = factor(halfyear,
levels = paste0(rep(sort(unique(year(published.date))),
each = 2), c("H1", "H2")))
) %>% dplyr::mutate(nbr_authors = vapply(author, function(a) nrow(a), NA_integer_))
papers <- papers %>% dplyr::distinct()
source_track <- c(source_track,
structure(rep("cleanup", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))In some cases, fetching information from (e.g.) the GitHub API fails for a subset of the publications. There are also other reasons for missing values (for example, the earliest submissions do not have an associated pre-review issue). The table below lists the number of missing values for each of the variables in the data frame.
DT::datatable(
data.frame(variable = colnames(papers),
nbr_missing = colSums(is.na(papers))) %>%
dplyr::mutate(source = source_track[variable]),
escape = FALSE, rownames = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE)
)ggplot(papers %>%
dplyr::mutate(pubmonth = lubridate::floor_date(published.date, "month")) %>%
dplyr::group_by(pubmonth) %>%
dplyr::summarize(npub = n()),
aes(x = factor(pubmonth), y = npub)) +
geom_bar(stat = "identity") + theme_minimal() +
labs(x = "", y = "Number of published papers per month", caption = dcap) +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))ggplot(papers %>%
dplyr::mutate(pubyear = lubridate::year(published.date)) %>%
dplyr::group_by(pubyear) %>%
dplyr::summarize(npub = n()),
aes(x = factor(pubyear), y = npub)) +
geom_bar(stat = "identity") + theme_minimal() +
labs(x = "", y = "Number of published papers per year", caption = dcap) +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))The plots below illustrate the fraction of pre-review and review issues closed during each month that have the ‘rejected’ label attached.
ggplot(all_rejected,
aes(x = factor(closedmonth), y = nbr_rejections/nbr_issues_closed)) +
geom_bar(stat = "identity") +
theme_minimal() +
facet_wrap(~ itype, ncol = 1) +
labs(x = "Month of issue closing", y = "Fraction of issues rejected",
caption = dcap) +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))Papers with 20 or more citations are grouped in the “>=20” category.
ggplot(papers %>%
dplyr::mutate(citation_count = replace(citation_count,
citation_count >= 20, ">=20")) %>%
dplyr::mutate(citation_count = factor(citation_count,
levels = c(0:20, ">=20"))) %>%
dplyr::group_by(citation_count) %>%
dplyr::tally(),
aes(x = citation_count, y = n)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(x = "Crossref citation count", y = "Number of publications", caption = dcap)The table below sorts the JOSS papers in decreasing order by the number of citations in Crossref.
DT::datatable(
papers %>%
dplyr::mutate(url = paste0("<a href='", url, "' target='_blank'>",
url,"</a>")) %>%
dplyr::arrange(desc(citation_count)) %>%
dplyr::select(title, url, published.date, citation_count),
escape = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE)
)plotly::ggplotly(
ggplot(papers, aes(x = published.date, y = citation_count, label = title)) +
geom_point(alpha = 0.5) + theme_bw() + scale_y_sqrt() +
geom_smooth() +
labs(x = "Date of publication", y = "Crossref citation count", caption = dcap) +
theme(axis.title = element_text(size = 15)),
tooltip = c("label", "x", "y")
)## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Here, we plot the citation count for all papers published within each half year, sorted in decreasing order.
ggplot(papers %>% dplyr::group_by(halfyear) %>%
dplyr::arrange(desc(citation_count)) %>%
dplyr::mutate(idx = seq_along(citation_count)),
aes(x = idx, y = citation_count)) +
geom_point(alpha = 0.5) +
facet_wrap(~ halfyear, scales = "free") +
theme_bw() +
labs(x = "Index", y = "Crossref citation count", caption = dcap)In these plots we investigate whether the time a submission spends in the pre-review or review stage (or their sum) has changed over time. The blue curve corresponds to a rolling median for submissions over 120 days.
## Helper functions (modified from https://stackoverflow.com/questions/65147186/geom-smooth-with-median-instead-of-mean)
rolling_median <- function(formula, data, xwindow = 120, ...) {
## Get order of x-values and sort x/y
ordr <- order(data$x)
x <- data$x[ordr]
y <- data$y[ordr]
## Initialize vector for smoothed y-values
ys <- rep(NA, length(x))
## Calculate median y-value for each unique x-value
for (xs in setdiff(unique(x), NA)) {
## Get x-values in the window, and calculate median of corresponding y
j <- ((xs - xwindow/2) < x) & (x < (xs + xwindow/2))
ys[x == xs] <- median(y[j], na.rm = TRUE)
}
y <- ys
structure(list(x = x, y = y, f = approxfun(x, y)), class = "rollmed")
}
predict.rollmed <- function(mod, newdata, ...) {
setNames(mod$f(newdata$x), newdata$x)
}ggplot(papers, aes(x = prerev_opened, y = as.numeric(days_in_pre))) +
geom_point() +
geom_smooth(formula = y ~ x, method = "rolling_median",
se = FALSE, method.args = list(xwindow = 120)) +
theme_bw() +
labs(x = "Date of pre-review opening", y = "Number of days in pre-review",
caption = dcap) +
theme(axis.title = element_text(size = 15))ggplot(papers, aes(x = review_opened, y = as.numeric(days_in_rev))) +
geom_point() +
geom_smooth(formula = y ~ x, method = "rolling_median",
se = FALSE, method.args = list(xwindow = 120)) +
theme_bw() +
labs(x = "Date of review opening", y = "Number of days in review",
caption = dcap) +
theme(axis.title = element_text(size = 15))ggplot(papers, aes(x = prerev_opened,
y = as.numeric(days_in_pre) + as.numeric(days_in_rev))) +
geom_point() +
geom_smooth(formula = y ~ x, method = "rolling_median",
se = FALSE, method.args = list(xwindow = 120)) +
theme_bw() +
labs(x = "Date of pre-review opening", y = "Number of days in pre-review + review",
caption = dcap) +
theme(axis.title = element_text(size = 15))Next, we consider the languages used by the submissions, both as reported by Whedon and based on the information encoded in available GitHub repositories (for the latter, we also record the number of bytes of code written in each language). Note that a given submission can use multiple languages.
## Language information from Whedon
sspl <- strsplit(papers$languages, ",")
all_languages <- unique(unlist(sspl))
langs <- do.call(dplyr::bind_rows, lapply(all_languages, function(l) {
data.frame(language = l,
nbr_submissions_Whedon = sum(vapply(sspl, function(v) l %in% v, 0)))
}))
## Language information from GitHub software repos
a <- lapply(strsplit(papers$repo_languages_bytes, ","), function(w) strsplit(w, ":"))
a <- a[sapply(a, length) > 0]
langbytes <- as.data.frame(t(as.data.frame(a))) %>%
setNames(c("language", "bytes")) %>%
dplyr::mutate(bytes = as.numeric(bytes)) %>%
dplyr::filter(!is.na(language)) %>%
dplyr::group_by(language) %>%
dplyr::summarize(nbr_bytes_GitHub = sum(bytes),
nbr_repos_GitHub = length(bytes)) %>%
dplyr::arrange(desc(nbr_bytes_GitHub))
langs <- dplyr::full_join(langs, langbytes, by = "language")ggplot(langs %>% dplyr::arrange(desc(nbr_submissions_Whedon)) %>%
dplyr::filter(nbr_submissions_Whedon > 10) %>%
dplyr::mutate(language = factor(language, levels = language)),
aes(x = language, y = nbr_submissions_Whedon)) +
geom_bar(stat = "identity") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = "", y = "Number of submissions", caption = dcap) +
theme(axis.title = element_text(size = 15))DT::datatable(
langs %>% dplyr::arrange(desc(nbr_bytes_GitHub)),
escape = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE)
)ggplot(langs, aes(x = nbr_repos_GitHub, y = nbr_bytes_GitHub)) +
geom_point() + scale_x_log10() + scale_y_log10() + geom_smooth() +
theme_bw() +
labs(x = "Number of repos using the language",
y = "Total number of bytes of code\nwritten in the language",
caption = dcap) +
theme(axis.title = element_text(size = 15))ggplotly(
ggplot(papers, aes(x = citation_count, y = repo_nbr_stars,
label = title)) +
geom_point(alpha = 0.5) + scale_x_sqrt() + scale_y_sqrt() +
theme_bw() +
labs(x = "Crossref citation count", y = "Number of stars, GitHub repo",
caption = dcap) +
theme(axis.title = element_text(size = 15)),
tooltip = c("label", "x", "y")
)ggplot(papers, aes(x = as.numeric(prerev_opened - repo_created))) +
geom_histogram(bins = 50) +
theme_bw() +
labs(x = "Time (days) from repo creation to JOSS pre-review start",
caption = dcap) +
theme(axis.title = element_text(size = 15))ggplot(papers, aes(x = as.numeric(repo_pushed - review_closed))) +
geom_histogram(bins = 50) +
theme_bw() +
labs(x = "Time (days) from closure of JOSS review to most recent commit in repo",
caption = dcap) +
theme(axis.title = element_text(size = 15)) +
facet_wrap(~ year(published.date), scales = "free_y")Submissions associated with rOpenSci and pyOpenSci are not considered here, since they are not explicitly reviewed at JOSS.
ggplot(papers %>%
dplyr::filter(!grepl("rOpenSci|pyOpenSci", prerev_labels)) %>%
dplyr::mutate(year = year(published.date)),
aes(x = nbr_reviewers)) + geom_bar() +
facet_wrap(~ year) + theme_bw() +
labs(x = "Number of reviewers", y = "Number of submissions", caption = dcap)Submissions associated with rOpenSci and pyOpenSci are not considered here, since they are not explicitly reviewed at JOSS.
reviewers <- papers %>%
dplyr::filter(!grepl("rOpenSci|pyOpenSci", prerev_labels)) %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::select(reviewers, year) %>%
tidyr::separate_rows(reviewers, sep = ",")
## Most active reviewers
DT::datatable(
reviewers %>% dplyr::group_by(reviewers) %>%
dplyr::summarize(nbr_reviews = length(year),
timespan = paste(unique(c(min(year), max(year))),
collapse = " - ")) %>%
dplyr::arrange(desc(nbr_reviews)),
escape = FALSE, rownames = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE)
)ggplot(papers %>%
dplyr::mutate(year = year(published.date),
`r/pyOpenSci` = factor(
grepl("rOpenSci|pyOpenSci", prerev_labels),
levels = c("TRUE", "FALSE"))),
aes(x = editor)) + geom_bar(aes(fill = `r/pyOpenSci`)) +
theme_bw() + facet_wrap(~ year, ncol = 1) +
scale_fill_manual(values = c(`TRUE` = "grey65", `FALSE` = "grey35")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = "Editor", y = "Number of submissions", caption = dcap)all_licenses <- sort(unique(papers$repo_license))
license_levels = c(grep("apache", all_licenses, value = TRUE),
grep("bsd", all_licenses, value = TRUE),
grep("mit", all_licenses, value = TRUE),
grep("gpl", all_licenses, value = TRUE),
grep("mpl", all_licenses, value = TRUE))
license_levels <- c(license_levels, setdiff(all_licenses, license_levels))
ggplot(papers %>%
dplyr::mutate(repo_license = factor(repo_license,
levels = license_levels)),
aes(x = repo_license)) +
geom_bar() +
theme_bw() +
labs(x = "Software license", y = "Number of submissions", caption = dcap) +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
facet_wrap(~ year(published.date), scales = "free_y")## For plots below, replace licenses present in less
## than 2.5% of the submissions by 'other'
tbl <- table(papers$repo_license)
to_replace <- names(tbl[tbl <= 0.025 * nrow(papers)])ggplot(papers %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::mutate(repo_license = replace(repo_license,
repo_license %in% to_replace,
"other")) %>%
dplyr::mutate(year = factor(year),
repo_license = factor(
repo_license,
levels = license_levels[license_levels %in% repo_license]
)) %>%
dplyr::group_by(year, repo_license, .drop = FALSE) %>%
dplyr::count() %>%
dplyr::mutate(year = as.integer(as.character(year))),
aes(x = year, y = n, fill = repo_license)) + geom_area() +
theme_minimal() +
scale_fill_brewer(palette = "Set1", name = "Software\nlicense",
na.value = "grey") +
theme(axis.title = element_text(size = 15)) +
labs(x = "Year", y = "Number of submissions", caption = dcap)ggplot(papers %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::mutate(repo_license = replace(repo_license,
repo_license %in% to_replace,
"other")) %>%
dplyr::mutate(year = factor(year),
repo_license = factor(
repo_license,
levels = license_levels[license_levels %in% repo_license]
)) %>%
dplyr::group_by(year, repo_license, .drop = FALSE) %>%
dplyr::summarize(n = n()) %>%
dplyr::mutate(freq = n/sum(n)) %>%
dplyr::mutate(year = as.integer(as.character(year))),
aes(x = year, y = freq, fill = repo_license)) + geom_area() +
theme_minimal() +
scale_fill_brewer(palette = "Set1", name = "Software\nlicense",
na.value = "grey") +
theme(axis.title = element_text(size = 15)) +
labs(x = "Year", y = "Fraction of submissions", caption = dcap)a <- unlist(strsplit(papers$repo_topics, ","))
a <- a[!is.na(a)]
topicfreq <- table(a)
colors <- viridis::viridis(100)
set.seed(1234)
wordcloud::wordcloud(
names(topicfreq), sqrt(topicfreq), min.freq = 1, max.words = 300,
random.order = FALSE, rot.per = 0.05, use.r.layout = FALSE,
colors = colors, scale = c(10, 0.1), random.color = TRUE,
ordered.colors = FALSE, vfont = c("serif", "plain")
)DT::datatable(as.data.frame(topicfreq) %>%
dplyr::rename(topic = a, nbr_repos = Freq) %>%
dplyr::arrange(desc(nbr_repos)),
escape = FALSE, rownames = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE))Here, we take a more detailed look at the papers that cite JOSS papers, using data from the Open Citations Corpus.
citations <- tryCatch({
citecorp::oc_coci_cites(doi = papers$alternative.id) %>%
dplyr::distinct() %>%
dplyr::mutate(citation_info_obtained = as.character(lubridate::today()))
}, error = function(e) {
NULL
})
dim(citations)## [1] 29729 8
if (!is.null(citations)) {
citations <- citations %>%
dplyr::filter(!(oci %in% citations_archive$oci))
tmpj <- rcrossref::cr_works(dois = unique(citations$citing))$data %>%
dplyr::select(contains("doi"), contains("container.title"), contains("issn"),
contains("type"), contains("publisher"), contains("prefix"))
citations <- citations %>% dplyr::left_join(tmpj, by = c("citing" = "doi"))
## bioRxiv preprints don't have a 'container.title' or 'issn', but we'll assume
## that they can be
## identified from the prefix 10.1101 - set the container.title
## for these records manually; we may or may not want to count these
## (would it count citations twice, both preprint and publication?)
citations$container.title[citations$prefix == "10.1101"] <- "bioRxiv"
## JOSS is represented by 'The Journal of Open Source Software' as well as
## 'Journal of Open Source Software'
citations$container.title[citations$container.title ==
"Journal of Open Source Software"] <-
"The Journal of Open Source Software"
## Remove real self citations (cited DOI = citing DOI)
citations <- citations %>% dplyr::filter(cited != citing)
## Merge with the archive
citations <- dplyr::bind_rows(citations, citations_archive)
} else {
citations <- citations_archive
if (is.null(citations[["citation_info_obtained"]])) {
citations$citation_info_obtained <- NA_character_
}
}
citations$citation_info_obtained[is.na(citations$citation_info_obtained)] <-
"2021-08-11"
write.table(citations, file = "joss_submission_citations.tsv",
row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)## Latest successful update of new citation data
max(as.Date(citations$citation_info_obtained))## [1] "2023-02-01"
## Number of JOSS papers with >0 citations included in this collection
length(unique(citations$cited))## [1] 1221
## Number of JOSS papers with >0 citations according to Crossref
length(which(papers$citation_count > 0))## [1] 1471
## Number of citations from Open Citations Corpus vs Crossref
df0 <- papers %>% dplyr::select(doi, citation_count) %>%
dplyr::full_join(citations %>% dplyr::group_by(cited) %>%
dplyr::tally() %>%
dplyr::mutate(n = replace(n, is.na(n), 0)),
by = c("doi" = "cited"))## Total citation count Crossref
sum(df0$citation_count, na.rm = TRUE)## [1] 42243
## Total citation count Open Citations Corpus
sum(df0$n, na.rm = TRUE)## [1] 28454
## Ratio of total citation count Open Citations Corpus/Crossref
sum(df0$n, na.rm = TRUE)/sum(df0$citation_count, na.rm = TRUE)## [1] 0.6735791
ggplot(df0, aes(x = citation_count, y = n)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(size = 3, alpha = 0.5) +
labs(x = "Crossref citation count", y = "Open Citations Corpus citation count",
caption = dcap) +
theme_bw()## Zoom in
ggplot(df0, aes(x = citation_count, y = n)) +
geom_abline(slope = 1, intercept = 0) +
geom_point(size = 3, alpha = 0.5) +
labs(x = "Crossref citation count", y = "Open Citations Corpus citation count",
caption = dcap) +
theme_bw() +
coord_cartesian(xlim = c(0, 75), ylim = c(0, 75))## Number of journals citing JOSS papers
length(unique(citations$container.title))## [1] 6142
length(unique(citations$issn))## [1] 4749
topcit <- citations %>% dplyr::group_by(container.title) %>%
dplyr::summarize(nbr_citations_of_joss_papers = length(cited),
nbr_cited_joss_papers = length(unique(cited)),
nbr_citing_papers = length(unique(citing)),
nbr_selfcitations_of_joss_papers = sum(author_sc == "yes"),
fraction_selfcitations = signif(nbr_selfcitations_of_joss_papers /
nbr_citations_of_joss_papers, digits = 3)) %>%
dplyr::arrange(desc(nbr_cited_joss_papers))
DT::datatable(topcit,
escape = FALSE, rownames = FALSE,
filter = list(position = 'top', clear = FALSE),
options = list(scrollX = TRUE))plotly::ggplotly(
ggplot(topcit, aes(x = nbr_citations_of_joss_papers, y = nbr_cited_joss_papers,
label = container.title)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey") +
geom_point(size = 3, alpha = 0.5) +
theme_bw() +
labs(caption = dcap, x = "Number of citations of JOSS papers",
y = "Number of cited JOSS papers")
)plotly::ggplotly(
ggplot(topcit, aes(x = nbr_citations_of_joss_papers, y = nbr_cited_joss_papers,
label = container.title)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey") +
geom_point(size = 3, alpha = 0.5) +
theme_bw() +
coord_cartesian(xlim = c(0, 100), ylim = c(0, 50)) +
labs(caption = dcap, x = "Number of citations of JOSS papers",
y = "Number of cited JOSS papers")
)write.table(topcit, file = "joss_submission_citations_byjournal.tsv",
row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)The tibble object with all data collected above is serialized to a file that can be downloaded and reused.
head(papers) %>% as.data.frame()## alternative.id container.title created deposited
## 1 10.21105/joss.02013 Journal of Open Source Software 2020-02-10 2020-02-10
## 2 10.21105/joss.05174 Journal of Open Source Software 2023-05-18 2023-05-18
## 3 10.21105/joss.05162 Journal of Open Source Software 2023-05-17 2023-05-17
## 4 10.21105/joss.00012 The Journal of Open Source Software 2016-05-16 2017-10-24
## 5 10.21105/joss.02181 Journal of Open Source Software 2020-07-16 2020-07-16
## 6 10.21105/joss.03771 Journal of Open Source Software 2021-12-28 2021-12-28
## published.print doi indexed issn issue issued
## 1 2020-02-10 10.21105/joss.02013 2023-05-19 2475-9066 46 2020-02-10
## 2 2023-05-18 10.21105/joss.05174 2023-05-19 2475-9066 85 2023-05-18
## 3 2023-05-17 10.21105/joss.05162 2023-05-18 2475-9066 85 2023-05-17
## 4 2016-05-16 10.21105/joss.00012 2023-05-19 2475-9066 1 2016-05-16
## 5 2020-07-16 10.21105/joss.02181 2023-06-16 2475-9066 51 2020-07-16
## 6 2021-12-28 10.21105/joss.03771 2023-06-19 2475-9066 68 2021-12-28
## member page prefix publisher score source reference.count
## 1 8722 2013 10.21105 The Open Journal 0 Crossref 12
## 2 8722 5174 10.21105 The Open Journal 0 Crossref 10
## 3 8722 5162 10.21105 The Open Journal 0 Crossref 24
## 4 8722 12 10.21105 The Open Journal 0 Crossref 4
## 5 8722 2181 10.21105 The Open Journal 0 Crossref 15
## 6 8722 3771 10.21105 The Open Journal 0 Crossref 25
## references.count is.referenced.by.count
## 1 12 6
## 2 10 0
## 3 24 0
## 4 4 5
## 5 15 5
## 6 25 5
## title
## 1 thresholdmodeling: A Python package for modeling excesses over a threshold using the Peak-Over-Threshold Method and the Generalized Pareto Distribution
## 2 PyBullet Industrial: A process-aware robot\nsimulation
## 3 XGI: A Python package for higher-order interaction\nnetworks
## 4 mst_clustering: Clustering via Euclidean Minimum Spanning Trees
## 5 ldaPrototype: A method in R to get a Prototype of multiple Latent Dirichlet Allocations
## 6 ORION2: A magnetohydrodynamics code for star formation
## type url volume
## 1 journal-article http://dx.doi.org/10.21105/joss.02013 5
## 2 journal-article http://dx.doi.org/10.21105/joss.05174 8
## 3 journal-article http://dx.doi.org/10.21105/joss.05162 8
## 4 journal-article http://dx.doi.org/10.21105/joss.00012 1
## 5 journal-article http://dx.doi.org/10.21105/joss.02181 5
## 6 journal-article http://dx.doi.org/10.21105/joss.03771 6
## short.container.title
## 1 JOSS
## 2 JOSS
## 3 JOSS
## 4 JOSS
## 5 JOSS
## 6 JOSS
## author
## 1 http://orcid.org/0000-0002-5829-7711, http://orcid.org/0000-0003-0170-6083, http://orcid.org/0000-0002-8166-5666, FALSE, FALSE, FALSE, Iago, Antônio, Marcus, Lemos, Lima, Duarte, first, additional, additional
## 2 http://orcid.org/0000-0002-7825-3476, NA, NA, http://orcid.org/0000-0003-0961-7675, FALSE, NA, NA, FALSE, Jan, Malte, Dominik, Prof. Dr.-Ing. Jürgen, Baumgärtner, Hansjosten, Schönhofen, Fleischer, first, additional, additional, additional
## 3 http://orcid.org/0000-0003-1270-4980, http://orcid.org/0000-0001-8087-2981, http://orcid.org/0000-0001-8794-6410, http://orcid.org/0000-0003-1847-5031, http://orcid.org/0000-0002-9146-8068, http://orcid.org/0000-0002-3047-4376, http://orcid.org/0000-0002-2675-2775, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, Nicholas W., Maxime, Iacopo, Giovanni, Alice, Alice, Leo, Landry, Lucas, Iacopini, Petri, Schwarze, Patania, Torres, first, additional, additional, additional, additional, additional, additional
## 4 http://orcid.org/0000-0002-9623-3401, FALSE, Jake, VanderPlas, first
## 5 http://orcid.org/0000-0002-0007-4478, FALSE, Jonas, Rieger, first
## 6 Pak, Andrew, Brandt, Richard, Mark, Aaron, Christopher, Stella, Anna, Aaron, Li, Cunningham, Gaches, Klein, Krumholz, Lee, McKee, Offner, Rosen, Skinner, first, additional, additional, additional, additional, additional, additional, additional, additional, additional, NA, NA, NA, NA, NA, http://orcid.org/0000-0002-8428-8050, NA, http://orcid.org/0000-0003-1252-9916, NA, NA, NA, NA, NA, NA, NA, FALSE, NA, FALSE, NA, NA
## subject
## 1 <NA>
## 2 Pulmonary and Respiratory Medicine,Pediatrics, Perinatology and Child Health
## 3 Pulmonary and Respiratory Medicine,Pediatrics, Perinatology and Child Health
## 4 <NA>
## 5 <NA>
## 6 <NA>
## citation_count
## 1 6
## 2 0
## 3 1
## 4 5
## 5 5
## 6 5
## api_title
## 1 thresholdmodeling: A Python package for modeling excesses over a threshold using the Peak-Over-Threshold Method and the Generalized Pareto Distribution
## 2 PyBullet Industrial: A process-aware robot simulation
## 3 XGI: A Python package for higher-order interaction networks
## 4 mst_clustering: Clustering via Euclidean Minimum Spanning Trees
## 5 ldaPrototype: A method in R to get a Prototype of multiple Latent Dirichlet Allocations
## 6 ORION2: A magnetohydrodynamics code for star formation
## api_state editor reviewers nbr_reviewers
## 1 accepted @drvinceknight @bahung,@kellieotto 2
## 2 accepted @adi3 @CameronDevine,@sea-bass 2
## 3 accepted @vissarion @arashbm,@MridulS 2
## 4 accepted @arfon @nicoguaro 1
## 5 accepted @karthik @tommyjones,@bstewart 2
## 6 accepted @dfm @zingale,@changgoo 2
## repo_url
## 1 https://github.com/iagolemos1/thresholdmodeling
## 2 https://github.com/WBK-Robotics/pybullet_industrial
## 3 https://github.com/ComplexGroupInteractions/xgi
## 4 http://github.com/jakevdp/mst_clustering
## 5 https://github.com/JonasRieger/ldaPrototype
## 6 https://bitbucket.org/orionmhdteam/orion2_release1/src/master/
## review_issue_id prereview_issue_id languages
## 1 2013 1999 Python
## 2 5174 4965
## 3 5162 5125 Python, Jupyter Notebook
## 4 12 NA Jupyter Notebook, Python
## 5 2181 2156 R
## 6 3771 3559 C++, Gnuplot, Fortran
## archive_doi
## 1 https://doi.org/10.5281/zenodo.3661338
## 2 https://doi.org/10.5281/zenodo.7833292
## 3 https://doi.org/10.5281/zenodo.7939055
## 4 https://doi.org/10.5281/zenodo.50995
## 5 https://doi.org/10.5281/zenodo.3945836
## 6 https://doi.org/10.5281/zenodo.5791188
## review_title
## 1 thresholdmodeling: A Python package for modeling excesses over a threshold using the Peak-Over-Threshold Method and the Generalized Pareto Distribution
## 2 Pybullet Industrial: A process-aware robot simulation
## 3 XGI: A Python package for higher-order interaction networks
## 4 <NA>
## 5 ldaPrototype: A method in R to get a Prototype of multiple Latent Dirichlet Allocations
## 6 ORION2: A magnetohydrodynamics code for star formation
## review_number review_state review_opened review_closed review_ncomments
## 1 2013 closed 2020-01-13 2020-02-10 69
## 2 5174 closed 2023-02-19 2023-05-30 67
## 3 5162 closed 2023-02-17 2023-05-17 65
## 4 NA <NA> <NA> <NA> NA
## 5 2181 closed 2020-05-03 2020-07-16 60
## 6 3771 closed 2021-09-27 2021-12-28 77
## review_labels
## 1 accepted,recommend-accept,published
## 2 accepted,TeX,recommend-accept,published,Track: 5 (DSAIS)
## 3 accepted,TeX,Python,Jupyter Notebook,recommend-accept,published,Track: 7 (CSISM)
## 4 <NA>
## 5 accepted,recommend-accept,published
## 6 accepted,Shell,Makefile,C++,recommend-accept,published
## prerev_title
## 1 thresholdmodeling: A Python package for modeling excesses over a threshold using the Peak-Over-Threshold Method and the Generalized Pareto Distribution
## 2 Pybullet Industrial: A process-aware robot simulation
## 3 XGI: A Python package for higher-order interaction networks
## 4 <NA>
## 5 ldaPrototype: A method in R to get a Prototype of multiple Latent Dirichlet Allocations
## 6 ORION2: A magnetohydrodynamics code for star formation
## prerev_state prerev_opened prerev_closed prerev_ncomments
## 1 closed 2020-01-07 2020-01-13 37
## 2 closed 2022-11-25 2023-02-19 29
## 3 closed 2023-02-03 2023-02-17 71
## 4 <NA> <NA> <NA> NA
## 5 closed 2020-03-10 2020-05-03 31
## 6 closed 2021-08-03 2021-09-27 22
## prerev_labels days_in_pre days_in_rev
## 1 TeX,Python 6 days 28 days
## 2 TeX,waitlisted,Track: 5 (DSAIS) 86 days 100 days
## 3 TeX,Python,Jupyter Notebook,Track: 7 (CSISM) 14 days 89 days
## 4 <NA> NA days NA days
## 5 TeX,R 54 days 74 days
## 6 Shell,Makefile,C++ 55 days 92 days
## to_review repo_created repo_updated repo_pushed repo_nbr_stars
## 1 TRUE 2019-12-27 2023-07-13 2020-12-24 29
## 2 TRUE 2022-05-11 2023-08-02 2023-08-20 19
## 3 TRUE 2021-09-08 2023-08-22 2023-08-22 126
## 4 FALSE 2015-10-28 2023-04-12 2016-05-16 78
## 5 TRUE 2019-05-21 2023-01-31 2023-01-31 7
## 6 TRUE <NA> <NA> <NA> NA
## repo_language repo_languages_bytes
## 1 Python Python:37812,TeX:3472
## 2 Python Python:162266
## 3 Jupyter Notebook Jupyter Notebook:3652502,Python:781106
## 4 Jupyter Notebook Jupyter Notebook:416862,Python:18113,TeX:1418,Makefile:249
## 5 R R:149724,TeX:5269,Shell:157
## 6 <NA> <NA>
## repo_topics
## 1
## 2 industrial-automation,robotics,simulation-framework
## 3 hypergraphs,higher-order-networks,network-science
## 4
## 5 topicmodeling,topicmodelling,lda,topic-models,topic-model,topic-similarities,text-mining,textdata,latent-dirichlet-allocation,modelselection,model-selection,reliability
## 6 <NA>
## repo_license repo_nbr_contribs repo_nbr_contribs_2ormore repo_info_obtained
## 1 lgpl-3.0 4 2 2023-08-23
## 2 mit 5 4 2023-08-23
## 3 other 12 8 2023-08-23
## 4 bsd-2-clause 1 1 2023-07-19
## 5 gpl-3.0 1 1 2023-08-09
## 6 <NA> NA NA <NA>
## published.date halfyear nbr_authors
## 1 2020-02-10 2020H1 3
## 2 2023-05-18 2023H1 4
## 3 2023-05-17 2023H1 7
## 4 2016-05-16 2016H1 1
## 5 2020-07-16 2020H2 1
## 6 2021-12-28 2021H2 10
saveRDS(papers, file = "joss_submission_analytics.rds")To read the current version of this file directly from GitHub, use the following code:
papers <- readRDS(gzcon(url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_analytics.rds?raw=true")))sessionInfo()## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.6.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: UTC
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] readr_2.1.4 citecorp_0.3.0 plotly_4.10.2 DT_0.29
## [5] jsonlite_1.8.7 purrr_1.0.2 gh_1.4.0 lubridate_1.9.2
## [9] ggplot2_3.4.3 tidyr_1.3.0 dplyr_1.1.3 rcrossref_1.2.009
## [13] tibble_3.2.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 viridisLite_0.4.2 farver_2.1.1 viridis_0.6.4
## [5] urltools_1.7.3 fastmap_1.1.1 lazyeval_0.2.2 promises_1.2.1
## [9] digest_0.6.33 timechange_0.2.0 mime_0.12 lifecycle_1.0.3
## [13] ellipsis_0.3.2 magrittr_2.0.3 compiler_4.3.1 rlang_1.1.1
## [17] sass_0.4.7 tools_4.3.1 wordcloud_2.6 utf8_1.2.3
## [21] yaml_2.3.7 data.table_1.14.8 knitr_1.43 fauxpas_0.5.2
## [25] labeling_0.4.3 htmlwidgets_1.6.2 bit_4.0.5 curl_5.0.2
## [29] plyr_1.8.8 xml2_1.3.5 RColorBrewer_1.1-3 httpcode_0.3.0
## [33] miniUI_0.1.1.1 withr_2.5.0 triebeard_0.4.1 grid_4.3.1
## [37] fansi_1.0.4 xtable_1.8-4 colorspace_2.1-0 gitcreds_0.1.2
## [41] scales_1.2.1 crul_1.4.0 cli_3.6.1 rmarkdown_2.24
## [45] crayon_1.5.2 generics_0.1.3 httr_1.4.7 tzdb_0.4.0
## [49] cachem_1.0.8 stringr_1.5.0 splines_4.3.1 parallel_4.3.1
## [53] vctrs_0.6.3 Matrix_1.5-4.1 hms_1.1.3 bit64_4.0.5
## [57] crosstalk_1.2.0 jquerylib_0.1.4 glue_1.6.2 stringi_1.7.12
## [61] gtable_0.3.4 later_1.3.1 munsell_0.5.0 pillar_1.9.0
## [65] rappdirs_0.3.3 htmltools_0.5.6 R6_2.5.1 httr2_0.2.3
## [69] vroom_1.6.3 evaluate_0.21 shiny_1.7.5 lattice_0.21-8
## [73] highr_0.10 httpuv_1.6.11 bslib_0.5.1 Rcpp_1.0.11
## [77] gridExtra_2.3 nlme_3.1-162 mgcv_1.8-42 whisker_0.4.1
## [81] xfun_0.40 pkgconfig_2.0.3